clear all
set more off
set matsize 10000
set maxvar 10000
adopath + ../code/gslab_tools/

preliminaries, doutf(../tables/)
set seed 123

cap log close
log using ../tables/log.log, replace
graph set window fontface "Times New Roman"

****************
** Table 1 ****
****************
clear matrix
use "../derived/Combined/combined_pair_level.dta", clear
drop if year==.
foreach outcome in new_relative stdz_outcome_all sig5_outcome best_outcome new_relative_effect_perc {
	*Only do the first column w/o controls
	if "`outcome'" == "new_relative" {
		summ `outcome' [aw=weight]
		local mean `r(mean)'

		areg `outcome' sponsor [aw=weight], a(factor) cluster(cluster_group)
		egen total = total(weight) if e(sample)==1
		summ total
		matrix combination_main_pair = nullmat(combination_main_pair), ///
			(_b[sponsor] \ _se[sponsor] \ `mean' \ `e(N)' \ `r(mean)' )
		drop total
	}
	summ `outcome' [aw=weight]
	local mean `r(mean)'

	areg `outcome' sponsor i.scale_group i.year_group [aw=weight], a(factor) cluster(cluster_group)
	egen total = total(weight) if e(sample)==1
	summ total
	matrix combination_main_pair = nullmat(combination_main_pair), (_b[sponsor] \ _se[sponsor] \ `mean' \ `e(N)' \ `r(mean)' )
	drop total
}
matrix list combination_main_pair
matrix_to_txt, saving(../tables/table1.txt) mat(combination_main_pair) ///
	format(%20.4f) title(<tab:table1>) append
	
	
****************
** Table 2  ****
****************

clear matrix

local robust1 ""
local robust2 "i.order"
local robust3 "i.rel_yr_group"
local robust4 "if year>=approve_year | year==."
local robust5 ""
local robust6 "[aw=no_randomised]"

use "../derived/Combined/combined_pair_level.dta", clear
drop if year==.
forvalues robust = 1/5 {
	if `robust'==2 | `robust'==3{
		summ new_relative  [aw=weight] 
		local mean `r(mean)'
	}
	if `robust'!=2 & `robust'!=3 {
		summ new_relative `robust`robust''  [aw=weight] 
		local mean `r(mean)'
	}
	if `robust'==5 {
		areg new_relative sponsor_remove_coi i.scale_group i.year_group ////
			[aw=weight], a(factor) cluster(cluster_group)
		egen total = total(weight) if e(sample)==1
		summ total
		matrix combination_robust = nullmat(combination_robust), ///
			(_b[sponsor_remove_coi] \ _se[sponsor_remove_coi] \ `mean' \ `r(mean)' )	
		drop total
	}
	else {
		areg new_relative sponsor i.scale_group i.year_group `robust`robust'' [aw=weight], a(factor) cluster(cluster_group)
		egen total = total(weight) if e(sample)==1
		summ total
		matrix combination_robust = nullmat(combination_robust), ///
			(_b[sponsor] \ _se[sponsor] \ `mean' \ `r(mean)' )
		drop total
	}
}
	gen weight2 = no_randomised * weight
	summ new_relative `robust`robust'' [aw=weight2] 
	local mean `r(mean)'

	areg new_relative sponsor i.scale_group i.year_group `robust`robust'' [aw=weight2], a(factor) cluster(cluster_group)
	egen total = total(weight) if e(sample)==1
	summ total
	matrix combination_robust = nullmat(combination_robust), ///
		(_b[sponsor] \ _se[sponsor] \ `mean' \ `r(mean)' )
	drop total

matrix list combination_robust
matrix_to_txt, saving(../tables/table2.txt) mat(combination_robust) ///
	format(%20.4f) title(<tab:table2>) append
	
	
****************
** Table 3  ****
****************

clear matrix
use "../derived/Combined/combined_pair_level.dta", clear
drop if year==.

bys drug_pair_no: egen has_nonindustry_var=max(unsponsored_trial)

local hetero1 "if active_placebo_pair==1"
local hetero2 "if active_placebo_pair==0"
local hetero3 "if anti_sample==1"
local hetero4 "if anti_sample==0"
local hetero5 "if has_nonindustry_var==1"
local hetero6 "if has_nonindustry_var==0"

forvalues hetero = 1/6 {
		summ new_relative `hetero`hetero'' [aw=weight] 
		local mean `r(mean)'

		areg new_relative sponsor i.scale_group i.year_group `hetero`hetero'' [aw=weight], a(factor) cluster(cluster_group)
		egen total = total(weight) if e(sample)==1
		summ total
		matrix combination_hetero = nullmat(combination_hetero), ///
			(_b[sponsor] \ _se[sponsor] \ `mean' \ `r(mean)' )
		drop total
}
matrix list combination_hetero
matrix_to_txt, saving(../tables/table3.txt) mat(combination_hetero) ///
	format(%20.4f) title(<tab:table3>) append

****************
** Table 4  ****
****************

clear matrix
use "../derived/Combined/combined_pair_level.dta", clear
drop if year==.


replace dose_min = 0 if drug=="placebo"
foreach var of varlist mean_age share_female dose_min no_randomised dropout_share ////
	lengthoftrial stdz_baseline {
	gen `var'_missing = `var'==.
	summ `var'
	replace `var' = `r(mean)' if `var'==.
}


local control1 ""
local control2 "lengthoftrial no_randomised dose_min"
local control3 "mean_age share_female stdz_baseline dropout_share"
local control4 "mean_age share_female stdz_baseline dropout_share lengthoftrial no_randomised dose_min"

forvalues control=1/4 {
	summ new_relative [aw=weight]
	local mean `r(mean)'

	areg new_relative sponsor `control`control'' i.scale_group i.year_group [aw=weight], a(factor_pair) cluster(cluster_group)
	egen total = total(weight) if e(sample)==1
	summ total
	matrix combination_chars = nullmat(combination_chars), (_b[sponsor] \ _se[sponsor] \ `mean' \ `r(mean)' )
	drop total
}
	
	
** Within Characteristics
xtile no_randomised_bin=no_randomised, nq(4)
xtile lengthoftrial_bin=lengthoftrial, nq(4)
gen age_bin=1 if mean_age<=60
replace age_bin=2 if mean_age>60
gen female_bin=1 if share_female<=50
replace female_bin=2 if share_female>50
xtile base_bin=stdz_baseline, nq(2)
xtile dropout_bin=dropout_share, nq(4)
xtile dose_bin=dose_min, nq(4)


egen factor_length_pair=group(drug_pair_no lengthoftrial_bin drug)
egen factor_n_pair = group(drug_pair_no drug no_randomised_bin)
egen factor_age_pair=group(drug_pair_no drug age_bin)
egen factor_female_pair=group(drug_pair_no drug female_bin)
egen factor_base_pair=group(drug_pair_no drug base_bin)
egen factor_dropout_pair=group(drug_pair_no drug dropout_bin)

egen factor_dose_pair = group(drug_pair_no drug dose_min)
egen factor_dosechar_pair=group(drug_pair_no drug dose_min age_bin female_bin)
egen factor_dosecharbase_pair=group(drug_pair_no drug dose_min age_bin female_bin base_bin)

local factor1 "factor_dose_pair"
local factor2 "factor_dosechar_pair"
local factor3 "factor_dosecharbase_pair"

forvalues factor=1/3 {
	summ new_relative [aw=weight]
	local mean `r(mean)'

	areg new_relative sponsor `control`control'' i.scale_group i.year_group [aw=weight], a(`factor`factor'') cluster(cluster_group)
	egen total = total(weight) if e(sample)==1
	summ total
	matrix combination_chars = nullmat(combination_chars), (_b[sponsor] \ _se[sponsor] \ `mean' \ `r(mean)' )
	drop total
}

matrix list combination_chars
matrix_to_txt, saving(../tables/table4.txt) mat(combination_chars) ///
	format(%20.4f) title(<tab:table4>) append
	

****************
** Table 5  ****
****************

*First, get baseline predictions
use "../derived/Combined/combined_arm_level.dta" if year!=., clear
replace dose_min = 0 if drug=="placebo"
foreach var of varlist mean_age share_female dose_min no_randomised dropout_share ////
	lengthoftrial stdz_baseline {
	gen `var'_missing = `var'==.
	summ `var'
	replace `var' = `r(mean)' if `var'==.
}

*Create LASSO predictions
lasso linear stdz_outcome_all c.mean_age#i.drug_group c.share_female#i.drug_group ///
	c.dropout_share#i.drug_group c.stdz_baseline#i.drug_group *_missing ///
	c.dose_min#i.drug_group c.no_randomised#i.drug_group ////
	c.lengthoftrial#i.drug_group i.setting_group i.status_group i.center_group ///
	i.placebo_group i.scale_group i.year_group 
predict predict_all, xb

*Create one-off predictions
foreach var in no_randomised lengthoftrial dose_min stdz_baseline dropout_share ///
	mean_age share_female {
	reg stdz_outcome_all c.`var'#i.drug_group i.scale_group i.year_group 
	predict predict_`var', xb
	global predict_`var'_r2 = `e(r2_a)'
}

keep studyname drug unique_id predict*
save ../derived/Prediction/prediction.dta, replace

clear matrix
use "../derived/Combined/combined_pair_level.dta" if year!=., clear
merge m:1 studyname unique_id using ../derived/Prediction/prediction.dta, assert(3) nogen	
		
foreach var in no_randomised lengthoftrial dose_min stdz_baseline dropout_share ///
	mean_age share_female all {		

	bys pair_trial_no: egen min_predict_`var'=min(predict_`var')
	gen predict_`var'_relative = predict_`var' - min_predict_`var'
	
	areg predict_`var'_relative sponsor i.scale_group i.year_group [aw=weight] ///
		, a(factor) cluster(cluster_group)

	egen total = total(weight) if e(sample)==1
	summ total
	local weight_N = `r(mean)'
	drop total
	
	summ predict_`var'_relative if e(sample)
	if "`var'"=="all" {
		matrix prediction = nullmat(prediction), (_b[sponsor] \ _se[sponsor] \ . ///
		 \ `r(mean)' \ `weight_N')
	}
	else{
		matrix prediction = nullmat(prediction), (_b[sponsor] \ _se[sponsor] \ ${predict_`var'_r2} ///
		 \ `r(mean)' \ `weight_N')	    
	}
}
matrix list prediction

** Add bootstrapped standard errors
forvalues bstrap=1/100 {	
	use "../derived/Combined/combined_pair_level.dta" if year!=., clear

	merge m:1 studyname unique_id using ../derived/Prediction/prediction_bstrap.dta, keepusing(*_`bstrap')
			
	foreach var in no_randomised lengthoftrial dose_min stdz_baseline dropout_share ///
		mean_age share_female all {		
		bys pair_trial_no: egen min_predict_`var'=min(predict_`var')
		gen predict_`var'_relative = predict_`var' - min_predict_`var'
		
		areg predict_`var'_relative sponsor i.scale_group i.year_group [aw=weight] ///
			, a(factor) cluster(cluster_group)

		egen total = total(weight) if e(sample)==1
		summ total
		local weight_N `r(mean)'
		drop total
		
		summ predict_`var'_relative if e(sample)
		matrix prediction_`bstrap' = nullmat(prediction_`bstrap'), (_b[sponsor])
	}
	matrix prediction_bstrap = nullmat(prediction_bstrap) \ prediction_`bstrap'
}

clear
svmat prediction_bstrap
collapse (sd) prediction*
mkmat *, matrix(prediction_se)

matrix prediction_bstrap = prediction[1,1...] \ prediction_se \ prediction[3,1...] \ prediction[4,1...] \ prediction[5,1...]
matrix list prediction_bstrap
 
matrix_to_txt, saving(../tables/table5.txt) mat(prediction_bstrap) ///
	format(%20.4f) title(<tab:table5>) append		
	
****************
** Table 6  ****
****************

clear matrix
use "../derived/Combined/combined_pair_level.dta", clear

gen published = year!=.

foreach outcome in published {
	summ `outcome'
	local mean `r(mean)'
	
	areg `outcome' stdz_outcome_relative_all i.scale_group  [aw=weight] if sponsor==1, a(factor) cluster(cluster_group)
	egen total = total(weight) if e(sample)==1
	summ total
	matrix published = nullmat(published), ////
		(_b[stdz_outcome_relative_all] \ _se[stdz_outcome_relative_all] \ `mean' \ `r(mean)' )

	areg `outcome' stdz_outcome_relative_all i.scale_group  [aw=weight] if sponsor==0, a(factor) cluster(cluster_group)
	drop total
	egen total = total(weight) if e(sample)==1
	summ total
	matrix published = nullmat(published), ////
		(_b[stdz_outcome_relative_all] \ _se[stdz_outcome_relative_all] \ `mean' \ `r(mean)' )

}
matrix list published
matrix_to_txt, saving(../tables/table6.txt) mat(published) ///
	format(%20.4f) title(<tab:table6>) append

	
	
****************
** Table 7 ****
****************

clear matrix
use "../derived/Combined/combined_pair_level.dta", clear
merge m:1 studyname using "../derived/NCT_registry/registry_link.dta", ///
	keepusing(linked) nogen
gen post = year>=2006 & year!=.
keep if year!=.

summ new_relative
local mean `r(mean)'
		
areg new_relative sponsor i.scale_group i.year_group  [aw=weight] if linked!=., a(factor) cluster(cluster_group)
egen total = total(weight) if e(sample)==1
summ total
matrix linked = nullmat(linked), (_b[sponsor] \ _se[sponsor] \ .\.\.\.\.\.\.\.\ `mean' \ `r(mean)' )
drop total

areg new_relative i.sponsor##i.post i.year_group i.scale_group [aw=weight], a(factor) cluster(cluster_group)
egen total = total(weight) if e(sample)==1
summ total
matrix linked = nullmat(linked), (_b[1.sponsor] \ _se[1.sponsor] \ _b[1.post] \ ////
	_se[1.post] \ _b[1.sponsor#1.post] \ _se[1.sponsor#1.post] \.\.\.\.\ `mean' \ `r(mean)')
drop total

areg new_relative i.sponsor##i.linked i.year_group i.scale_group [aw=weight], a(factor) cluster(cluster_group)
egen total = total(weight) if e(sample)==1
summ total
matrix linked = nullmat(linked), (_b[1.sponsor] \ _se[1.sponsor] \ .\.\.\.\ _b[1.linked] \ ////
	_se[1.linked] \ _b[1.sponsor#1.linked] \ _se[1.sponsor#1.linked] \ `mean' \ `r(mean)' )
		

matrix list linked
matrix_to_txt, saving(../tables/table7.txt) mat(linked) ///
	format(%20.4f) title(<tab:table7>) append
	
